set (PACKAGE_BUGREPORT https://github.com/BYVoid/Opencc/issues)
set (OPENCC_VERSION_MAJOR 1)
set (OPENCC_VERSION_MINOR 0)
-set (OPENCC_VERSION_REVISION 4)
+set (OPENCC_VERSION_REVISION 5)
if (CMAKE_BUILD_TYPE MATCHES Debug)
set (version_suffix .Debug)
######## Windows
-if (WIN32)
- set(CMAKE_SHARED_LIBRARY_PREFIX ${CMAKE_INSTALL_PREFIX})
- set(CMAKE_STATIC_LIBRARY_PREFIX ${CMAKE_INSTALL_PREFIX})
-endif (WIN32)
+#if (WIN32)
+# set(CMAKE_SHARED_LIBRARY_PREFIX ${CMAKE_INSTALL_PREFIX})
+# set(CMAKE_STATIC_LIBRARY_PREFIX ${CMAKE_INSTALL_PREFIX})
+#endif (WIN32)
######## Mac OS X
set (DIR_INCLUDE ${DIR_PREFIX}/include/)
set (DIR_SHARE ${DIR_PREFIX}/share/)
set (DIR_ETC ${DIR_PREFIX}/etc/)
-set (LIB_SUFFIX "")
set (DIR_LIBRARY ${DIR_PREFIX}/lib${LIB_SUFFIX}/)
if (DEFINED SHARE_INSTALL_PREFIX)
set (DIR_ETC ${SYSCONF_INSTALL_DIR})
endif (DEFINED SYSCONF_INSTALL_DIR)
-set (DIR_SHARE_OPENCC ${DIR_SHARE}opencc/)
-set (DIR_SHARE_LOCALE ${DIR_SHARE}locale/)
+if (DEFINED LIB_INSTALL_DIR)
+ set (DIR_LIBRARY ${LIB_INSTALL_DIR})
+endif (DEFINED LIB_INSTALL_DIR)
+
+set (DIR_SHARE_OPENCC ${DIR_SHARE}/opencc/)
+set (DIR_SHARE_LOCALE ${DIR_SHARE}/locale/)
######## Configuration
endif ()
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
add_definitions(
- /Wall
+ /W4
/D "_CRT_SECURE_NO_WARNINGS"
)
endif()
# Change History of OpenCC
+## Version 1.0.5
+
+2017年2月6日
+
+* 修正Windows下CMake和Visual Studio的問題。
+* 修正FNV Hash的32位編譯警告。
+* 增加若干臺灣常用詞彙轉換和異體字轉換。
+* 增加和修正若干轉換問題。
+* 加快Node模塊編譯速度。
+* 增加Node模塊的詞典轉換接口和Promise接口。
+
## Version 1.0.4
2016年4月1日
# Open Chinese Convert 開放中文轉換
+[  ](https://bintray.com/byvoid/opencc/OpenCC/_latestVersion)
+[](https://travis-ci.org/BYVoid/OpenCC)
+
## Introduction 介紹
Open Chinese Convert (OpenCC, 開放中文轉換) is an opensource project for conversion between Traditional Chinese and Simplified Chinese, supporting character-level conversion, phrase-level conversion, variant conversion and regional idioms among Mainland China, Taiwan and Hong kong.
* [Ubuntu](https://launchpad.net/ubuntu/+source/opencc)
* [Fedora](https://admin.fedoraproject.org/pkgdb/package/opencc/)
* [Arch Linux](https://www.archlinux.org/packages/community/x86_64/opencc/)
-* [Mac OS](https://github.com/mxcl/homebrew/blob/master/Library/Formula/opencc.rb)
+* [Mac OS](https://github.com/Homebrew/homebrew-core/blob/master/Formula/opencc.rb)
* [Node.js](https://npmjs.org/package/opencc)
## Download 下載
## Build 編譯
-[](https://travis-ci.org/BYVoid/OpenCC)
-
### Build with CMake
Linux (gcc 4.6 is required):
Windows MSYS:
```
-cmake .. -G "MSYS Makefiles" -DCMAKE_INSTALL_PREFIX="" -DCMAKE_BUILD_TYPE=Release
-make
+cmake -H. -Bbuild -G "MSYS Makefiles" -DCMAKE_INSTALL_PREFIX="path/to/install"
+cmake --build build --config Release --target install
```
Windows Visual Studio (2013 or higher required):
```
-cmake .. -G "Visual Studio 12" -DCMAKE_INSTALL_PREFIX="" -DCMAKE_BUILD_TYPE=Release
-make
+cmake -H. -Bbuild -G"Visual Studio 12" -DCMAKE_INSTALL_PREFIX="path/to/install"
+cmake --build build --config Release --target install
```
### iOS
* [Flandre Scarlet](https://github.com/XadillaX)
* [宋辰文](https://github.com/songchenwen)
* [iwater](https://github.com/iwater)
+* [Xpol Wan](https://github.com/xpol)
+* [Weihang Lo](https://github.com/weihanglo)
+* [Cychih](https://github.com/pi314)
+* [kyleskimo](https://github.com/kyleskimo)
+* [Ryuan Choi](https://github.com/bunhere)
Please update this list you have contributed OpenCC.
"includes": [
"node/global.gypi",
"node/configs.gypi",
- "node/opencc_dict.gypi",
"node/dicts.gypi",
"node/node_binding.gypi",
]
${DICT}.ocd
COMMENT
"Building ${DICT}.ocd"
+ COMMAND
+ ${CMAKE_COMMAND} -E copy "$<TARGET_FILE:libopencc>" "$<TARGET_FILE_DIR:${OPENCC_DICT_BIN}>"
COMMAND
${OPENCC_DICT_BIN}
--input ${DICT_${DICT}_INPUT}
𫠒 鱆
𫠖 𩿅
𫠜 齯
+𫢸 僤
+𫮃 墠
+𫰛 娙
+𫶇 嵽
+𫷷 廞
+𫸩 彄
+𬀩 暐
𬬭 錀
𬬻 鑪
𬭊 𨧀
𬭛 𨨏
𬭳 𨭎
𬭶 𨭆
+𬶋 鮈
+𬶍 鮀
+𬶏 鮠
+𬶟 鯻
+𬸪 鷭
下注解 下註解
下游 下游
下游工业 下游工業
-ä¸\8bç¡®ç\95\8c ä¸\8bç¡®界
+ä¸\8bç¡®ç\95\8c ä¸\8b確界
下种 下種
下笔千言 下筆千言
下签 下籤
僵固 僵固
僵固性 僵固性
僵尸 殭屍
-僵尸网络 僵屍網絡
+僵尸网络 殭屍網絡
僵局 僵局
僵持 僵持
僵持不下 僵持不下
几下 幾下
几世 幾世
几世纪 幾世紀
-å\87 ä¸\9d å\87 絲
+å\87 ä¸\9d å¹¾絲
几两 幾兩
几个 幾個
几个人 幾個人
千里之行 千里之行
千里命驾 千里命駕
千里始足下 千里始足下
-千里姻缘一线牵 千裏姻緣一線牽
+千里姻缘一线牵 千里姻緣一線牽
千里寄鹅毛 千里寄鵝毛
千里搭长棚 千里搭長棚
千里犹面 千里猶面
受制 受制
受制于 受制於
受制于人 受制於人
-受命于天 受命于天
+受命于天 受命於天
受困 受困
受夠了 受夠了
受尽 受盡
受托者 受託者
受折磨 受折磨
受用不尽 受用不盡
-受聘于 受聘于
+受聘于 受聘於
受阻于 受阻於
受限于 受限於
受难曲 受難曲
吊祭 弔祭
吊稍 吊稍
吊窗 吊窗
-å\90\8a篮 å¼\94籃
+å\90\8a篮 å\90\8a籃
吊索 吊索
吊纸 弔紙
吊线 吊線
吊脚儿事 弔腳兒事
吊腰撒跨 弔腰撒跨
吊膀子 吊膀子
-å\90\8aè\87\82 å¼\94臂
+å\90\8aè\87\82 å\90\8a臂
吊衣架 吊衣架
吊袜 吊襪
吊袜带 吊襪帶
哪里 哪裏
哪里买 哪裏買
哪里人 哪裏人
-哪里哪里 哪里哪里
+哪里哪里 哪裏哪裏
哪里摔倒哪里爬 哪裏摔倒哪裏爬
哭个 哭個
哭个夠 哭個夠
大不里士 大不里士
大丑 大丑
大专同学 大專同學
-大专杯 大專杯
+大专杯 大專盃
大业千秋 大業千秋
大个 大個
大个儿 大個兒
威布里吉 威布里吉
威廉亚历山大 威廉亞歷山大
威比苏诺 威比蘇諾
-威氏注音法 威氏註音法
+威氏注音法 威氏注音法
威里斯 威里斯
威风八面 威風八面
娇娘 嬌娘
尼布甲尼撒 尼布甲尼撒
尼庵 尼庵
尼采 尼采
-尼龙布 尼龍佈
+尼龙布 尼龍布
尽世 盡世
尽义务 盡義務
尽了 盡了
干片 乾片
干犯 干犯
干犯法 幹犯法
-干球温度 幹球溫度
+干球温度 乾球溫度
干甚 幹甚
干甚么 幹甚麼
干生受 乾生受
战术轰炸 戰術轟炸
战栗 戰慄
战略伙伴 戰略伙伴
-战略防御倡议 戰略防御倡議
+战略防御倡议 戰略防禦倡議
战胜 戰勝
战胜国 戰勝國
战表 戰表
戴姆勒克莱斯勒 戴姆勒克萊斯勒
戴希穆克 戴希穆克
戴瑞克罗 戴瑞克羅
-戴维斯杯 戴維斯杯
+戴维斯杯 戴維斯盃
戴胜益 戴勝益
戴胜通 戴勝通
戴蒙 戴蒙
抵押借款 抵押借款
抵挡不了 抵擋不了
抵牾 牴牾
-æ\8aµè§¸ 牴觸
+æ\8aµè§¦ 牴觸
抵针 抵針
抹了 抹了
抹布 抹布
新艺术 新藝術
新艺综合体 新藝綜合體
新芬党 新芬黨
-æ\96°è\8d¯ æ\96°è\91¯
+æ\96°è\8d¯ æ\96°è\97¥
新莺出谷 新鶯出谷
新规范 新規範
新闻价值 新聞價值
杠头 槓頭
杠子 槓子
杠杆 槓桿
-æ\9d æ\9d\86æ\94¶è´ æ\9d æ\9d\86收購
+æ\9d æ\9d\86æ\94¶è´ æ§\93æ¡¿收購
杠杠 槓槓
杠牌 槓牌
杠着 槓着
杯酒解怨 杯酒解怨
杯酒言欢 杯酒言歡
杯酒释兵权 杯酒釋兵權
-杯里 杯里
+杯里 杯裏
杯面 杯麪
杰乐米 傑樂米
杰伊汉港 傑伊漢港
核儿 核兒
核冬天 核冬天
核准 覈准
-核准的 覈準的
+核准的 覈准的
核减 覈減
核出口控制 核出口控制
核力 核力
注释 註釋
注重 注重
注销 註銷
-注音 註音
+注音 注音
注音一式 注音一式
注音字母 注音字母
注音文 注音文
-注音法 註音法
+注音法 注音法
注音符号 注音符號
泪出痛肠 淚出痛腸
泪如泉涌 淚如泉涌
石油输出国家组织 石油輸出國家組織
石油输出国组织 石油輸出國組織
石灰岩 石灰岩
-ç\9f³ç\81°å²©æ´\9e ç\9f³ç\81°å·\96洞
+ç\9f³ç\81°å²©æ´\9e ç\9f³ç\81°å²©洞
石炭系 石炭系
石版术 石版術
石百合 石百合
种地 種地
种姓 種姓
种姓制 種姓制
-种姓制度 種姓製度
+种姓制度 種姓制度
种子 種子
种子园 種子園
种子地 種子地
签证 簽證
签证费 簽證費
签诗 籤詩
-ç¾è¯é¥¼ ç°½語餅
+ç¾è¯é¥¼ 籤語餅
签赌 簽賭
签赌案 簽賭案
签赌站 簽賭站
给于 給於
给价 給價
给出 給出
-给我干脆 給我干脆
+给我干脆 給我乾脆
给药 給藥
绚丽多彩 絢麗多彩
绚烂归于平淡 絢爛歸於平淡
致理技术学院 致理技術學院
致用 致用
致电 致電
-致畸 緻畸
+致畸 致畸
致疑 致疑
致病 致病
致病性 致病性
计穷虑极 計窮慮極
计算出 計算出
计算出来 計算出來
-计算机制图 計算機制圖
-计算机集成制造 計算機集成制造
+计算机制图 計算機製圖
+计算机集成制造 計算機集成製造
计量制 計量制
订个 訂個
订了 訂了
足于 足於
足协杯 足協盃
足坛 足壇
-足总杯 足總杯
+足总杯 足總盃
足食丰衣 足食豐衣
趸售物价 躉售物價
趸当 躉當
阿扎伦卡 阿紮倫卡
阿扎尼亚 阿扎尼亞
阿托品 阿托品
-阿拉伯共同市场 阿拉伯共衕市場
+阿拉伯共同市场 阿拉伯共同市場
阿拉伯联合大公国 阿拉伯聯合大公國
阿拉伯联合酋长国 阿拉伯聯合酋長國
阿拉克 阿拉克
须发文 須發文
须发皆白 鬚髮皆白
须发表 須發表
-é¡»å\90\8eæ°´ é \88後水
+é¡»å\90\8eæ°´ é¬\9a後水
须子 鬚子
须将有日思无日 須將有日思無日
须弥 須彌
香熏疗法 香薰療法
香皂 香皂
香菜叶 香菜葉
-香蜡 香
+香蜡 香蠟
香蜡店 香蠟店
香蜡纸马 香蠟紙馬
香蜡铺 香蠟鋪
涼菜 冷盤
砹 砈
硅 矽
+納米 奈米
詞組 片語
蹦極 笨豬跳
輔音 子音
嬀 媯
峯 峰
幺 么
+擡 抬
曬 晒
棱 稜
+檐 簷
污 汙
泄 洩
涌 湧
睾 睪
竈 灶
糉 粽
+繮 韁
纔 才
羣 群
蔿 蒍
裏 裡
覈 核
踊 踴
+鉢 缽
鮎 鯰
麪 麵
+齶 顎
云 雲 云 「云」意義爲「說」,其餘用「雲」。 人云亦云 雲霧
仆 僕 仆 「仆」意義爲「跌倒」,讀音pu1,「僕」爲「供人使喚的人」,讀音pu2。 前仆後繼 仆街 奴僕 公僕 風塵僕僕
舍 舍 捨 「捨」讀作she3,用於「放棄」意義,其餘用「舍」,讀作she4,古文亦同「捨」。 宿舍 村舍 退避三舍 捨弃 舍我其誰 不舍晝夜
-ç¾ ç±\96 簽 「簽」用於動詞,表示「題字題名」,其餘用「籤」。 簽名 簽證 標籤 書籤 牙籤
+ç¾ ç±¤ 簽 「簽」用於動詞,表示「題字題名」,其餘用「籤」。 簽名 簽證 標籤 書籤 牙籤
折 折 摺 與「叠」有關用「摺」,與「斷」有關用「折」。 摺紙 摺扇 存摺 折斷 折腰 折服 打折 損兵折將
谷 谷 穀 表示「兩山之間」的地域用「谷」,表示農作物時用「穀」。 山谷 稻穀
几 幾 几 「几」只用作「茶几」。表示「幾乎」、「幾個」意義用「幾」。 茶几 幾乎 幾個
DIRECTORY
${CMAKE_BINARY_DIR}/doc/html
DESTINATION
- ${DIR_SHARE_OPENCC}doc
+ ${DIR_SHARE_OPENCC}/doc
)
set_directory_properties(
#include "Config.hpp"
#include "Converter.hpp"
+#include "DictConverter.hpp"
+
+// For faster build
+#include "BinaryDict.cpp"
+#include "Config.cpp"
+#include "Conversion.cpp"
+#include "ConversionChain.cpp"
+#include "Converter.cpp"
+#include "DartsDict.cpp"
+#include "Dict.cpp"
+#include "DictConverter.cpp"
+#include "DictEntry.cpp"
+#include "DictGroup.cpp"
+#include "MaxMatchSegmentation.cpp"
+#include "Segmentation.cpp"
+#include "TextDict.cpp"
+#include "UTF8Util.cpp"
using namespace opencc;
return converter_->Convert(input);
}
+ static NAN_METHOD(Version) {
+ info.GetReturnValue().Set(Nan::New<v8::String>(VERSION).ToLocalChecked());
+ }
+
static NAN_METHOD(New) {
OpenccBinding* instance;
try {
if (info.Length() >= 1 && info[0]->IsString()) {
- string configFile = ToUtf8String(info[0]);
+ const string configFile = ToUtf8String(info[0]);
instance = new OpenccBinding(configFile);
} else {
instance = new OpenccBinding("s2t.json");
OpenccBinding* instance = Nan::ObjectWrap::Unwrap<OpenccBinding>(info.This());
- string input = ToUtf8String(info[0]);
+ const string input = ToUtf8String(info[0]);
string output;
try {
output = instance->Convert(input);
info.GetReturnValue().Set(converted);
}
+ static NAN_METHOD(GenerateDict) {
+ if (info.Length() < 4 || !info[0]->IsString() || !info[1]->IsString()
+ || !info[2]->IsString() || !info[3]->IsString()) {
+ Nan::ThrowTypeError("Wrong arguments");
+ return;
+ }
+ const string inputFileName = ToUtf8String(info[0]);
+ const string outputFileName = ToUtf8String(info[1]);
+ const string formatFrom = ToUtf8String(info[2]);
+ const string formatTo = ToUtf8String(info[3]);
+ try {
+ opencc::ConvertDictionary(inputFileName, outputFileName, formatFrom, formatTo);
+ } catch (opencc::Exception& e) {
+ Nan::ThrowError(e.what());
+ }
+ }
+
static NAN_MODULE_INIT(Init) {
// Prepare constructor template
v8::Local<v8::FunctionTemplate> tpl = Nan::New<v8::FunctionTemplate>(OpenccBinding::New);
tpl->SetClassName(Nan::New("Opencc").ToLocalChecked());
tpl->InstanceTemplate()->SetInternalFieldCount(1);
+ // Methods
+ Nan::SetMethod(tpl, "version", Version);
+ Nan::SetMethod(tpl, "generateDict", GenerateDict);
// Prototype
Nan::SetPrototypeMethod(tpl, "convert", Convert);
Nan::SetPrototypeMethod(tpl, "convertSync", ConvertSync);
*/
// In your project you should replace './opencc' with 'opencc'
-var OpenCC = require('./opencc');
+const OpenCC = require('./opencc');
+
+console.log('OpenCC version', OpenCC.version);
// Load the default Simplified to Traditional config
-var opencc = new OpenCC('s2t.json');
+const opencc = new OpenCC('s2t.json');
// Sync API
-var converted = opencc.convertSync("汉字");
+const converted = opencc.convertSync("汉字");
console.log(converted);
// Async API
-opencc.convert("汉字", function (err, converted) {
+opencc.convert("汉字", (err, converted) => {
+ console.log(err, converted);
+});
+
+// Async API with Promise
+opencc.convertPromise("汉字").then(converted => {
console.log(converted);
});
--- /dev/null
+const OpenCC = require('./opencc');
+
+const input = process.argv[2];
+const output = process.argv[3];
+
+OpenCC.generateDict(input, output, "text", "ocd");
"target_name": "dicts",
"type": "none",
"variables": {
- "cmd": "<(PRODUCT_DIR)/opencc_dict",
+ "cmd": "<(module_root_dir)/node/dict.js",
"dict_merge": "<(module_root_dir)/data/scripts/merge.py",
"dict_reverse": "<(module_root_dir)/data/scripts/reverse.py",
"input_prefix": "<(module_root_dir)/data/dictionary/",
"variables": {
"input": "<(input_prefix)STCharacters.txt",
},
- "inputs": ["<(cmd)", "<(input)"],
+ "inputs": ["<(input)"],
"outputs": ["<(output_prefix)STCharacters.ocd"],
- "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+ "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
}, {
"action_name": "STPhrases",
"variables": {
"input": "<(input_prefix)STPhrases.txt",
},
- "inputs": ["<(cmd)", "<(input)"],
+ "inputs": ["<(input)"],
"outputs": ["<(output_prefix)STPhrases.ocd"],
- "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+ "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
}, {
"action_name": "TSCharacters",
"variables": {
"input": "<(input_prefix)TSCharacters.txt",
},
- "inputs": ["<(cmd)", "<(input)"],
+ "inputs": ["<(input)"],
"outputs": ["<(output_prefix)TSCharacters.ocd"],
- "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+ "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
}, {
"action_name": "TSPhrases",
"variables": {
"input": "<(input_prefix)TSPhrases.txt",
},
- "inputs": ["<(cmd)", "<(input)"],
+ "inputs": ["<(input)"],
"outputs": ["<(output_prefix)TSPhrases.ocd"],
- "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+ "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
}, {
"action_name": "TWVariants",
"variables": {
"input": "<(input_prefix)TWVariants.txt",
},
- "inputs": ["<(cmd)", "<(input)"],
+ "inputs": ["<(input)"],
"outputs": ["<(output_prefix)TWVariants.ocd"],
- "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+ "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
}, {
"action_name": "TWVariantsRevPhrases",
"variables": {
"input": "<(input_prefix)TWVariantsRevPhrases.txt",
},
- "inputs": ["<(cmd)", "<(input)"],
+ "inputs": ["<(input)"],
"outputs": ["<(output_prefix)TWVariantsRevPhrases.ocd"],
- "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+ "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
}, {
"action_name": "JPVariants",
"variables": {
"input": "<(input_prefix)JPVariants.txt",
},
- "inputs": ["<(cmd)", "<(input)"],
+ "inputs": ["<(input)"],
"outputs": ["<(output_prefix)JPVariants.ocd"],
- "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+ "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
}, {
"action_name": "TWPhrases.txt",
"inputs": ["<(cmd)"],
"variables": {
"input": "<(input_prefix)TWVariants.txt",
},
- "inputs": ["<(cmd)", "<(input)"],
+ "inputs": ["<(input)"],
"outputs": ["<(output_prefix)TWVariantsRev.txt"],
"action": ["python", "<(dict_reverse)", "<(input)", "<@(_outputs)"]
}, {
"variables": {
"input": "<(output_prefix)TWPhrases.txt",
},
- "inputs": ["<(cmd)", "<(input)"],
+ "inputs": ["<(input)"],
"outputs": ["<(output_prefix)TWPhrasesRev.txt"],
"action": ["python", "<(dict_reverse)", "<(input)", "<@(_outputs)"]
}, {
"variables": {
"input": "<(output_prefix)TWPhrases.txt",
},
- "inputs": ["<(cmd)", "<(input)"],
+ "inputs": ["<(input)"],
"outputs": ["<(output_prefix)TWPhrases.ocd"],
- "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+ "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
}, {
"action_name": "TWVariantsRev",
"variables": {
"input": "<(output_prefix)TWVariantsRev.txt",
},
- "inputs": ["<(cmd)", "<(input)"],
+ "inputs": ["<(input)"],
"outputs": ["<(output_prefix)TWVariantsRev.ocd"],
- "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+ "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
}, {
"action_name": "TWPhrasesRev",
"variables": {
"input": "<(output_prefix)TWPhrasesRev.txt",
},
- "inputs": ["<(cmd)", "<(input)"],
+ "inputs": ["<(input)"],
"outputs": ["<(output_prefix)TWPhrasesRev.ocd"],
- "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+ "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
}, {
"action_name": "HKVariants",
"variables": {
"input": "<(input_prefix)HKVariants.txt",
},
- "inputs": ["<(cmd)", "<(input)"],
+ "inputs": ["<(input)"],
"outputs": ["<(output_prefix)HKVariants.ocd"],
- "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+ "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
}, {
"action_name": "HKVariantsPhrases",
"variables": {
"input": "<(input_prefix)HKVariantsPhrases.txt",
},
- "inputs": ["<(cmd)", "<(input)"],
+ "inputs": ["<(input)"],
"outputs": ["<(output_prefix)HKVariantsPhrases.ocd"],
- "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+ "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
}, {
"action_name": "HKVariantsRevPhrases",
"variables": {
"input": "<(input_prefix)HKVariantsRevPhrases.txt",
},
- "inputs": ["<(cmd)", "<(input)"],
+ "inputs": ["<(input)"],
"outputs": ["<(output_prefix)HKVariantsRevPhrases.ocd"],
- "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+ "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
}, {
"action_name": "HKVariantsRev.txt",
"variables": {
"input": "<(input_prefix)HKVariants.txt",
},
- "inputs": ["<(cmd)", "<(input)"],
+ "inputs": ["<(input)"],
"outputs": ["<(output_prefix)HKVariantsRev.txt"],
"action": ["python", "<(dict_reverse)", "<(input)", "<@(_outputs)"]
}, {
"variables": {
"input": "<(output_prefix)HKVariantsRev.txt",
},
- "inputs": ["<(cmd)", "<(input)"],
+ "inputs": ["<(input)"],
"outputs": ["<(output_prefix)HKVariantsRev.ocd"],
- "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+ "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
}],
"dependencies": [
- "opencc_dict"
+ "binding"
]
}]
}
{
"variables": {
- "opencc_version": "1.0.4"
+ "opencc_version": "1.0.5"
},
"target_defaults": {
"defines": [
"target_name": "binding",
"sources": [
"../node/binding.cc",
- "../src/BinaryDict.cpp",
- "../src/Config.cpp",
- "../src/Conversion.cpp",
- "../src/ConversionChain.cpp",
- "../src/Converter.cpp",
- "../src/DartsDict.cpp",
- "../src/Dict.cpp",
- "../src/DictEntry.cpp",
- "../src/DictGroup.cpp",
- "../src/MaxMatchSegmentation.cpp",
- "../src/Segmentation.cpp",
- "../src/TextDict.cpp",
- "../src/UTF8Util.cpp",
],
"include_dirs": [
"../src",
this.handler = new binding.Opencc(config);
};
+/**
+ * The version of OpenCC library.
+ *
+ * @fn OpenCC.version
+ * @memberof OpenCC
+ * @ingroup node_api
+ */
+OpenCC.version = binding.Opencc.version();
+
+/**
+ * Generates dictionary from another format.
+ *
+ * @fn string generateDict(string inputFileName, string outputFileName, string formatFrom, string formatTo)
+ * @memberof OpenCC
+ * @param inputFileName Input dictionary filename.
+ * @param outputFileName Output dictionary filename.
+ * @param formatFrom Input dictionary format.
+ * @param formatTo Input dictionary format.
+ * @return Converted text.
+ * @ingroup node_api
+ */
+OpenCC.generateDict = function(inputFileName, outputFileName,
+ formatFrom, formatTo) {
+ return binding.Opencc.generateDict(inputFileName, outputFileName,
+ formatFrom, formatTo);
+}
+
/**
* Converts input text.
*
OpenCC.prototype.convertSync = function (input) {
return this.handler.convertSync(input.toString());
};
+
+/**
+ * Converts input text asynchronously and returns a Promise.
+ *
+ * @fn Promise convertPromise(string input)
+ * @memberof OpenCC
+ * @param input Input text.
+ * @return The Promise that will yield the converted text.
+ * @ingroup node_api
+ */
+OpenCC.prototype.convertPromise = function (input) {
+ const self = this;
+ return new Promise(function(resolve, reject) {
+ self.handler.convert(input.toString(), function(err, text) {
+ if (err) reject(err);
+ else resolve(text);
+ });
+ });
+};
+++ /dev/null
-{
- "targets": [{
- "target_name": "opencc_dict",
- "type": "executable",
- "sources": [
- "../src/BinaryDict.cpp",
- "../src/DartsDict.cpp",
- "../src/Dict.cpp",
- "../src/DictEntry.cpp",
- "../src/DictGroup.cpp",
- "../src/TextDict.cpp",
- "../src/UTF8Util.cpp",
- "../src/tools/DictConverter.cpp",
- ],
- "include_dirs": [
- "../src",
- "../deps/darts-clone",
- "../deps/tclap-1.2.1"
- ]
- }]
-}
{
"name": "opencc",
- "version": "1.0.4",
+ "version": "1.0.5",
"description": "Conversion between Traditional and Simplified Chinese",
"author": "BYVoid <byvoid@byvoid.com>",
- "license": "Apache",
+ "license": "Apache-2.0",
"main": "node/opencc.js",
"scripts": {
"test": "mocha -R spec node/test.js"
"mocha": "2.2.5"
},
"dependencies": {
- "nan": "^2.2.0"
+ "nan": "^2.5.1"
}
}
}
void BinaryDict::SerializeToFile(FILE* fp) const {
- string keyBuffer, valueBuffer;
+ string keyBuf, valueBuf;
vector<size_t> keyOffsets, valueOffsets;
size_t keyTotalLength = 0, valueTotalLength = 0;
- ConstructBuffer(keyBuffer, keyOffsets, keyTotalLength, valueBuffer,
+ ConstructBuffer(keyBuf, keyOffsets, keyTotalLength, valueBuf,
valueOffsets, valueTotalLength);
// Number of items
size_t numItems = lexicon->Length();
// Data
fwrite(&keyTotalLength, sizeof(size_t), 1, fp);
- fwrite(keyBuffer.c_str(), sizeof(char), keyTotalLength, fp);
+ fwrite(keyBuf.c_str(), sizeof(char), keyTotalLength, fp);
fwrite(&valueTotalLength, sizeof(size_t), 1, fp);
- fwrite(valueBuffer.c_str(), sizeof(char), valueTotalLength, fp);
+ fwrite(valueBuf.c_str(), sizeof(char), valueTotalLength, fp);
size_t keyCursor = 0, valueCursor = 0;
for (const DictEntry* entry : *lexicon) {
return dict;
}
-void BinaryDict::ConstructBuffer(string& keyBuffer, vector<size_t>& keyOffset,
- size_t& keyTotalLength, string& valueBuffer,
+void BinaryDict::ConstructBuffer(string& keyBuf, vector<size_t>& keyOffset,
+ size_t& keyTotalLength, string& valueBuf,
vector<size_t>& valueOffset,
size_t& valueTotalLength) const {
keyTotalLength = 0;
}
}
// Write keys and values to buffers
- keyBuffer.resize(keyTotalLength, '\0');
- valueBuffer.resize(valueTotalLength, '\0');
- char* pKeyBuffer = const_cast<char*>(keyBuffer.c_str());
- char* pValueBuffer = const_cast<char*>(valueBuffer.c_str());
+ keyBuf.resize(keyTotalLength, '\0');
+ valueBuf.resize(valueTotalLength, '\0');
+ char* pKeyBuffer = const_cast<char*>(keyBuf.c_str());
+ char* pValueBuffer = const_cast<char*>(valueBuf.c_str());
for (const DictEntry* entry : *lexicon) {
strcpy(pKeyBuffer, entry->Key());
- keyOffset.push_back(pKeyBuffer - keyBuffer.c_str());
+ keyOffset.push_back(pKeyBuffer - keyBuf.c_str());
pKeyBuffer += entry->KeyLength() + 1;
if (entry->NumValues() == 1) {
const auto* svEntry = static_cast<const SingleValueDictEntry*>(entry);
strcpy(pValueBuffer, svEntry->Value());
- valueOffset.push_back(pValueBuffer - valueBuffer.c_str());
+ valueOffset.push_back(pValueBuffer - valueBuf.c_str());
pValueBuffer += strlen(svEntry->Value()) + 1;
} else {
const auto* mvEntry = static_cast<const MultiValueDictEntry*>(entry);
for (const auto& value : mvEntry->Values()) {
strcpy(pValueBuffer, value);
- valueOffset.push_back(pValueBuffer - valueBuffer.c_str());
+ valueOffset.push_back(pValueBuffer - valueBuf.c_str());
pValueBuffer += strlen(value) + 1;
}
}
}
- assert(keyBuffer.c_str() + keyTotalLength == pKeyBuffer);
- assert(valueBuffer.c_str() + valueTotalLength == pValueBuffer);
+ assert(keyBuf.c_str() + keyTotalLength == pKeyBuffer);
+ assert(valueBuf.c_str() + valueTotalLength == pValueBuffer);
}
Converter.hpp
DartsDict.hpp
Dict.hpp
+ DictConverter.hpp
DictEntry.hpp
DictGroup.hpp
Exception.hpp
Converter.cpp
DartsDict.cpp
Dict.cpp
+ DictConverter.cpp
DictEntry.cpp
DictGroup.cpp
MaxMatchSegmentation.cpp
UTF8Util.cpp
)
-add_library(libopencc ${LIBOPENCC_SOURCES})
+add_library(libopencc ${LIBOPENCC_SOURCES} ${LIBOPENCC_HEADERS})
+source_group(libopencc FILES ${LIBOPENCC_SOURCES} ${LIBOPENCC_HEADERS})
GENERATE_EXPORT_HEADER(
libopencc
DictPtr ParseDict(const JSONValue& doc) {
// Required: type
string type = GetStringProperty(doc, "type");
- DictPtr dict;
+
if (type == "group") {
list<DictPtr> dicts;
const JSONValue& docs = GetArrayProperty(doc, "dicts");
if (cache != nullptr) {
return cache;
}
+ DictPtr dict;
if (type == "text") {
dict = LoadDictWithPaths<TextDict>(fileName);
} else if (type == "ocd") {
}
ConfigInternal* impl = (ConfigInternal*)internal;
- impl->configDirectory = configDirectory;
+ if (configDirectory.back() == '/' || configDirectory.back() == '\\')
+ impl->configDirectory = configDirectory;
+ else
+ impl->configDirectory = configDirectory + '/';
// Required: segmentation
SegmentationPtr segmentation =
}
}
+TEST_F(ConfigTest, NewFromStringWitoutTrailingSlash) {
+ std::ifstream ifs(CONFIG_TEST_PATH);
+ string content(std::istreambuf_iterator<char>(ifs),
+ (std::istreambuf_iterator<char>()));
+ string pathWithoutTrailingSlash = CMAKE_SOURCE_DIR "/test/config_test";
+
+ const ConverterPtr converter = config.NewFromString(
+ content, pathWithoutTrailingSlash);
+}
+
} // namespace opencc
fwrite(&dartsSize, sizeof(size_t), 1, fp);
fwrite(dict.array(), sizeof(char), dartsSize, fp);
- auto internal = this->internal;
internal->binary.reset(new BinaryDict(lexicon));
internal->binary->SerializeToFile(fp);
}
if (!result.IsNull()) {
return result;
}
- len -= UTF8Util::PrevCharLength(wordTruncPtr);
+ len -= static_cast<long>(UTF8Util::PrevCharLength(wordTruncPtr));
}
return Optional<const DictEntry*>::Null();
}
string wordTrunc = UTF8Util::TruncateUTF8(word, KeyMaxLength());
const char* wordTruncPtr = wordTrunc.c_str() + wordTrunc.length();
for (long len = static_cast<long>(wordTrunc.length()); len > 0;
- len -= UTF8Util::PrevCharLength(wordTruncPtr)) {
+ len -= static_cast<long>(UTF8Util::PrevCharLength(wordTruncPtr))) {
wordTrunc.resize(static_cast<size_t>(len));
wordTruncPtr = wordTrunc.c_str() + len;
const Optional<const DictEntry*>& result = Match(wordTrunc.c_str());
--- /dev/null
+/*
+ * Open Chinese Convert
+ *
+ * Copyright 2010-2017 BYVoid <byvoid@byvoid.com>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DartsDict.hpp"
+#include "DictConverter.hpp"
+#include "TextDict.hpp"
+
+using namespace opencc;
+
+DictPtr LoadDictionary(const string& format, const string& inputFileName) {
+ if (format == "text") {
+ return SerializableDict::NewFromFile<TextDict>(inputFileName);
+ } else if (format == "ocd") {
+ return SerializableDict::NewFromFile<DartsDict>(inputFileName);
+ } else {
+ fprintf(stderr, "Unknown dictionary format: %s\n", format.c_str());
+ exit(2);
+ }
+ return nullptr;
+}
+
+SerializableDictPtr ConvertDict(const string& format,
+ const DictPtr dict) {
+ if (format == "text") {
+ return TextDict::NewFromDict(*dict.get());
+ } else if (format == "ocd") {
+ return DartsDict::NewFromDict(*dict.get());
+ } else {
+ fprintf(stderr, "Unknown dictionary format: %s\n", format.c_str());
+ exit(2);
+ }
+ return nullptr;
+}
+
+namespace opencc {
+void ConvertDictionary(const string inputFileName, const string outputFileName,
+ const string formatFrom, const string formatTo) {
+ DictPtr dictFrom = LoadDictionary(formatFrom, inputFileName);
+ SerializableDictPtr dictTo = ConvertDict(formatTo, dictFrom);
+ dictTo->SerializeToFile(outputFileName);
+}
+}
--- /dev/null
+/*
+ * Open Chinese Convert
+ *
+ * Copyright 2010-2017 BYVoid <byvoid@byvoid.com>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "Common.hpp"
+
+namespace opencc {
+/**
+* Converts a dictionary from a format to another.
+* @ingroup opencc_cpp_api
+*/
+void ConvertDictionary(const string inputFileName, const string outputFileName,
+ const string formatFrom, const string formatTo);
+}
size_t NumValues() const { return values.size(); }
vector<const char*> Values() const {
- vector<const char*> values;
+ vector<const char*> retsult;
for (const string& value : this->values) {
- values.push_back(value.c_str());
+ retsult.push_back(value.c_str());
}
- return values;
+ return retsult;
}
private:
namespace opencc {
-class OPENCC_EXPORT Exception : public std::exception {
+class OPENCC_EXPORT Exception {
public:
Exception() {}
-/*
+/*
* Open Chinese Convert
*
* Copyright 2015 BYVoid <byvoid@byvoid.com>
std::min(static_cast<LengthType>(wordMaxLength + suffixSetLength),
text.UTF8Length());
const UTF8StringSlice& slice = text.Left(suffixLength);
- suffixes.push_back(UTF8StringSlice8Bit(slice.CString(), slice.UTF8Length(),
- slice.ByteLength()));
+ suffixes.push_back(UTF8StringSlice8Bit(slice.CString(),
+ static_cast<UTF8StringSlice8Bit::LengthType>(slice.UTF8Length()),
+ static_cast<UTF8StringSlice8Bit::LengthType>(slice.ByteLength())));
}
suffixes.shrink_to_fit();
// Sort suffixes
std::min(static_cast<LengthType>(wordMaxLength + prefixSetLength),
text.UTF8Length());
const UTF8StringSlice& slice = text.Right(prefixLength);
- prefixes.push_back(UTF8StringSlice8Bit(slice.CString(), slice.UTF8Length(),
- slice.ByteLength()));
+ prefixes.push_back(UTF8StringSlice8Bit(slice.CString(),
+ static_cast<UTF8StringSlice8Bit::LengthType>(slice.UTF8Length()),
+ static_cast<UTF8StringSlice8Bit::LengthType>(slice.ByteLength())));
+
}
prefixes.shrink_to_fit();
// Sort suffixes reversely
ExtractSuffixes();
}
for (const auto& suffix : suffixes) {
- for (size_t i = 1; i <= suffix.UTF8Length() && i <= wordMaxLength; i++) {
+ for (UTF8StringSlice8Bit::LengthType i = 1; i <= suffix.UTF8Length() && i <= wordMaxLength; i++) {
const UTF8StringSlice8Bit wordCandidate = suffix.Left(i);
signals->AddKey(wordCandidate).frequency++;
totalOccurrence++;
const std::function<void(const PhraseExtract::UTF8StringSlice8Bit& word,
AdjacentSetType& adjacentSet)>& updateEntropy) {
AdjacentSetType adjacentSet;
+ auto setLength8Bit = static_cast<PhraseExtract::UTF8StringSlice8Bit::LengthType>(setLength);
for (PhraseExtract::LengthType length = wordMinLength;
length <= wordMaxLength; length++) {
adjacentSet.clear();
if (presuffix.UTF8Length() < length) {
continue;
}
+ auto length8Bit = static_cast<PhraseExtract::UTF8StringSlice8Bit::LengthType>(length);
const auto& wordCandidate =
- SUFFIX ? presuffix.Left(length) : presuffix.Right(length);
+ SUFFIX ? presuffix.Left(length8Bit) : presuffix.Right(length8Bit);
if (wordCandidate != lastWord) {
updateEntropy(lastWord, adjacentSet);
lastWord = wordCandidate;
}
if (length + setLength <= presuffix.UTF8Length()) {
if (SUFFIX) {
- const auto& wordSuffix = presuffix.SubString(length, setLength);
+ const auto& wordSuffix = presuffix.SubString(length8Bit, setLength8Bit);
adjacentSet[wordSuffix]++;
} else {
const auto& wordPrefix = presuffix.SubString(
- presuffix.UTF8Length() - length - setLength, setLength);
+ presuffix.UTF8Length() - length8Bit - setLength8Bit, setLength8Bit);
adjacentSet[wordPrefix]++;
}
}
const UTF8StringSlice8Bit& wordCandidate) const {
// TODO Try average value
double minPMI = INFINITY;
- for (LengthType leftLength = 1; leftLength <= wordCandidate.UTF8Length() - 1;
+ for (UTF8StringSlice8Bit::LengthType leftLength = 1; leftLength <= wordCandidate.UTF8Length() - 1;
leftLength++) {
const auto& leftPart = wordCandidate.Left(leftLength);
const auto& rightPart =
namespace opencc {
-class PhraseExtract {
+class OPENCC_EXPORT PhraseExtract {
public:
typedef UTF8StringSlice::LengthType LengthType;
#endif
int opencc_close(opencc_t opencc) {
- try {
- SimpleConverter* instance = reinterpret_cast<SimpleConverter*>(opencc);
- delete instance;
- return 0;
- } catch (std::exception& ex) {
- cError = ex.what();
- return 1;
- }
+ SimpleConverter* instance = reinterpret_cast<SimpleConverter*>(opencc);
+ delete instance;
+ return 0;
}
size_t opencc_convert_utf8_to_buffer(opencc_t opencc, const char* input,
return FNVHash(text, byteLength, 16777619UL, 2166136261UL);
}
+#if SIZE_MAX == 0xffffffffffffffff
template <>
inline size_t FNVHash<8>(const char* text, const size_t byteLength) {
return FNVHash(text, byteLength, 1099511628211UL, 14695981039346656037UL);
}
+#endif
} // namespace internal
typedef LENGTH_TYPE LengthType;
UTF8StringSliceBase(const char* _str)
- : str(_str), utf8Length(UTF8Util::Length(_str)),
- byteLength(strlen(_str)) {}
+ : str(_str), utf8Length(static_cast<LengthType>(UTF8Util::Length(_str))),
+ byteLength(static_cast<LengthType>(strlen(_str))) {}
UTF8StringSliceBase(const char* _str, const LengthType _utf8Length)
: str(_str), utf8Length(_utf8Length) {
LengthType ByteLength() const { return byteLength; }
- UTF8StringSliceBase Left(const LengthType utf8Length) const {
- if (utf8Length == UTF8Length()) {
+ UTF8StringSliceBase Left(const LengthType numberOfCharacters) const {
+ if (numberOfCharacters == UTF8Length()) {
return *this;
} else {
- return UTF8StringSliceBase(str, utf8Length);
+ return UTF8StringSliceBase(str, numberOfCharacters);
}
}
- UTF8StringSliceBase Right(const LengthType utf8Length) const {
- if (utf8Length == UTF8Length()) {
+ UTF8StringSliceBase Right(const LengthType numberOfCharacters) const {
+ if (numberOfCharacters == UTF8Length()) {
return *this;
} else {
const char* pstr = str + byteLength;
- for (size_t i = 0; i < utf8Length; i++) {
+ for (size_t i = 0; i < numberOfCharacters; i++) {
pstr = UTF8Util::PrevChar(pstr);
}
- return UTF8StringSliceBase(pstr, utf8Length);
+ return UTF8StringSliceBase(pstr, numberOfCharacters);
}
}
UTF8StringSliceBase SubString(const LengthType offset,
- const LengthType utf8Length) const {
+ const LengthType numberOfCharacters) const {
if (offset == 0) {
- return Left(utf8Length);
+ return Left(numberOfCharacters);
} else {
const char* pstr = str;
for (size_t i = 0; i < offset; i++) {
pstr = UTF8Util::NextChar(pstr);
}
- return UTF8StringSliceBase(pstr, utf8Length);
+ return UTF8StringSliceBase(pstr, numberOfCharacters);
}
}
for (size_t i = 0; i < utf8Length; i++) {
pstr = UTF8Util::NextChar(pstr);
}
- byteLength = pstr - str;
+ byteLength = static_cast<LengthType>(pstr - str);
}
const char* str;
#ifdef _MSC_VER
static std::string U16ToU8(const std::wstring& wstr) {
std::string ret;
- int convcnt = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.length(), NULL, 0, NULL, NULL);
+ int length = static_cast<int>(wstr.length());
+ int convcnt = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), length, NULL, 0, NULL, NULL);
if (convcnt > 0) {
ret.resize(convcnt);
- WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.length(), &ret[0], convcnt, NULL, NULL);
+ WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), length, &ret[0], convcnt, NULL, NULL);
}
return ret;
}
static std::wstring U8ToU16(const std::string& str) {
std::wstring ret;
- int convcnt = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), NULL, 0);
+ int length = static_cast<int>(str.length());
+ int convcnt = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), length, NULL, 0);
if (convcnt > 0) {
ret.resize(convcnt);
- MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), &ret[0], convcnt);
+ MultiByteToWideChar(CP_UTF8, 0, str.c_str(), length, &ret[0], convcnt);
}
return ret;
}
*/
#include "CmdLineOutput.hpp"
-#include "DartsDict.hpp"
-#include "TextDict.hpp"
+#include "DictConverter.hpp"
using namespace opencc;
-DictPtr LoadDictionary(const string& format, const string& inputFileName) {
- if (format == "text") {
- return SerializableDict::NewFromFile<TextDict>(inputFileName);
- } else if (format == "ocd") {
- return SerializableDict::NewFromFile<DartsDict>(inputFileName);
- } else {
- fprintf(stderr, "Unknown dictionary format: %s\n", format.c_str());
- exit(2);
- }
- return nullptr;
-}
-
-SerializableDictPtr ConvertDictionary(const string& format,
- const DictPtr dict) {
- if (format == "text") {
- return TextDict::NewFromDict(*dict.get());
- } else if (format == "ocd") {
- return DartsDict::NewFromDict(*dict.get());
- } else {
- fprintf(stderr, "Unknown dictionary format: %s\n", format.c_str());
- exit(2);
- }
- return nullptr;
-}
-
-void ConvertDictionary(const string inputFileName, const string outputFileName,
- const string formatFrom, const string formatTo) {
- DictPtr dictFrom = LoadDictionary(formatFrom, inputFileName);
- SerializableDictPtr dictTo = ConvertDictionary(formatTo, dictFrom);
- dictTo->SerializeToFile(outputFileName);
-}
-
int main(int argc, const char* argv[]) {
try {
TCLAP::CmdLine cmd("Open Chinese Convert (OpenCC) Dictionary Tool", ' ',